********************************************************************************************************************************
***   Replication file for:                                                                                                  ***
***   Berbee, P., Braun, S. T. and Franke, R. (2024). Reversing Fortunes of German Regions, 1926-2019. JoEG.			     ***
***   							                                                                                             ***
***   SCRIPT: 	_x2f_preparation_census_1950_87.do																			 ***	
***   PURPOSE: 	Reads in and prepares data from the harmonized 1950-1987 censuses										 	 ***
***				(Gesis file ZA2472)																							 ***
********************************************************************************************************************************

* Preamble (unnecessary when executing run.do)
run "$reversing/scripts/programs/_config.do"

************
* Code begins
************

*** Import data 
import excel "$reversing/data/ZA2472_1.xlsx", firstrow clear

** Create variables of interest

** Education

** 1970: 
* Use variables on Berufsfachschule / Ingenieur (vzc209) as well as Abitur / Mittlere Reife (vzc210) instead of separate variables (vzc077-vzc080), as the latter are mostly not filled
* Note: "ZA2472_Dokumentation" explicitely mentions that these are the highest degree attained
gen volksschule_1970=vzc076
gen mhsreife_1970=vzc209
gen berufsfach_1970=vzc210
gen hochschule_1970=vzc081

* Normalization could be population above 18, as we do not have education statistics by age
* Alternative: Just sum over the different degrees

gen population_ü15_1970=vzc216+vzc217+vzc218+vzc219+vzc220+vzc221+vzc222+vzc223 
gen population_ü18_1970=vzc217+vzc218+vzc219+vzc220+vzc221+vzc222+vzc223 

gen arbeitsstätten_prod_1970 = vzc133 + vzc134 + vzc135
label var arbeitsstätten_prod_1970 "Number of establishments in industry (incl. mining and construction)"


** Foreigners & GDR refugees
gen auslaender_1961=vzb044
gen auslaender_1970=vzc061
gen auslaender_1987=vz87i04k
gen bevoelkerung_1950=vza001
gen bevoelkerung_1961=vzb001
gen bevoelkerung_1970=vzc001
gen bevoelkerung_1987=vz87i07k

gen erwerbslose_1987=vz87i64k


** Employment by sector
** Notes: -
* For consistency, we always use the sum across sectors to obtain the overall number of economically active persons.
* The labor force category does not always seem to be 100% comparable, as the 1987 census uses a different concept 

* 1987
gen erwerbst_1987=vz87i48k+vz87i49k+vz87i50k+vz87i51k	
gen erwerbst_landforst_1987=vz87i48k	
gen erwerbst_prod_1987=vz87i49k	
gen erwerbst_sonst_1987=vz87i50k+vz87i51k

* 1970 (Note: Use "Erwerbspersonen" instead of "Erwerbstätige" because of consistency to previous years)
gen erwerbst_1970=vzc089+vzc090+vzc091+vzc092
gen erwerbst_landforst_1970=vzc089	
gen erwerbst_prod_1970=vzc090
gen erwerbst_sonst_1970=vzc091+vzc092

* 1961 (Note: Use "Erwerbspersonen" instead of "Erwerbstätige" because of missings)
gen erwerbst_1961 = vzb082 + vzb083 + vzb084 + vzb085
gen erwerbst_landforst_1961=vzb082	
gen erwerbst_prod_1961=vzb083	
gen erwerbst_sonst_1961=vzb084+vzb085

* 1950 (Note: Use "Erwerbspersonen" instead of "Erwerbstätige" because of missings)
gen erwerbst_1950=vza025 + vza026 + vza027 + vza028
gen erwerbst_landforst_1950=vza025
gen erwerbst_prod_1950=vza026	
gen erwerbst_sonst_1950=vza027+vza028

* Self-employment
gen selfemployed_1950=vza021
gen selfemployed_1961=vzb073
gen selfemployed_1970=vzc107
gen selfemployed_1987=vz87i57k

* Apprentices
gen lehrlinge_1961=vzb078
gen lehrlinge_1970=vzc111
gen lehrlinge_1987=vz87i60k+vz87i62k

gen lehrlinge_gewerblich_1970 = vzc116
gen lehrlinge_gewerblich_1987 = vz87i62k


* Save data

rename Sheet1__La labor_market_id

keep labor_market_id  *1950 *1961 *1970 *1987
destring labor_market_id, replace
sort labor_market_id
 
save "$reversing/processed/intermediate/VZ1950_87.dta", replace

*** EOF
